/*
 * gawk -- GNU version of awk
 * Copyright (C) 1986 Free Software Foundation
 *   Written by Paul Rubin, August 1986
 *
 * This will be free software eventually, but not until it is finished.
 */

%{
#define YYDEBUG

  static int yylex ();
  static yyerror ();
  int expression_value;

%}

%union {
  long lval;
  int voidval;
  float fval;
  NODE *exp_val;
  char *sval;
}

%type <exp_val> exp start
%token <sval> NAME
%token <lval> ERROR
%token <fval> NUMBER

%left OR
%left AND
%left EQUAL NOTEQUAL
%left '<' '>' LEQ GEQ
%left '+' '-'
%left '*' '/' '%'
%right ASSIGNOP
%right UNARY

%%

start   :	program
		{ expression_value = $1; }
	;

program :
		/* empty */
		{ $$ = NULL ; }
	|	rule
			{ $$ = $1; }
	| 	program rule
			{ $$ = append_rule ($1, $2); }
	;

rule	:
		pattern action NEWLINE
			{ $$ = make_rule ($1, $2); }
	;

pattern :	/* empty */
	| BEGIN
	| END
	| selection
	| '!' selection
	| selection AND selection
	| selection OR selection
	| '(' selection ')'
	| pattern ',' pattern
	;


/* In the next 2 rules, want_regexp tells yylex to expect stuff
	enclosed by slashes and return a regexp token. */

selection :
		{ ++want_regexp; }
	REGEXP
		{ want_regexp = 0;
		  $$ = make_regexp ($1);
		}
	| relational_expression
	;

relational_expression :
	expression MATCHOP
		 { ++want_regexp; }
	   REGEXP
		 { want_regexp = 0;
		   $$ = node ($1, $2, make_regexp($3));
		 }
	| expression RELOP expression
		{ $$ = node ($1, $2, $3); }
	;



action	:	/* empty */
	|	'{' statements '}'
	;

/* Expressions, not including the comma operator.  */
exp	:	'(' exp ')'
			{ $$ = $2; }
	|	'-' exp    %prec UNARY
			{ $$ = node ($2, UNARY_MINUS, NULL);
	|	INCDEC variable %prec UNARY
			{ $$ = node ($2, PRE_INCDEC, $1);
	|	variable INCDEC  %prec UNARY
			{ $$ = node ($2, POST_INCDEC, $1);
	|	NUMBER
			{ $$ = make_number ($1)
	|	STRING
			{ $$ = make_string ($1);
	;

variable :
	 	NAME
			{ $$ = variable ($1); }
	|	NAME '[' exp ']'
			{ $$ = node (variable($1), OP_SUBSCRIPT, $3); }
	;

/* Binary operators in order of decreasing precedence.  */
exp	:	exp '*' exp
			{ $$ = node ($1, $2, $3); }
	|	exp '/' exp
			{ $$ = node ($1, $2, $3); }
	|	exp '%' exp
			{ $$ = node ($1, $2, $3); }
	|	exp '+' exp
			{ $$ = node ($1, $2, $3); }
	|	exp '-' exp
			{ $$ = node ($1, $2, $3); }
	|	exp ASSIGNOP exp
			{ $$ = node ($1, $2, $3); }
	;
%%

/* During parsing of a gawk program, the pointer to the next character
   is in this variable.  */

static char *lexptr;


struct token {
  char *operator;
  int value;
  int class;
};

#define NULL 0

static struct token tokentab2[] = {
  {"&&", AND, AND},
  {"||", OR, OR},
  {"==", EQUAL, RELOP},
  {"!=", NOTEQUAL, RELOP},
  {"<=", LEQ, RELOP},
  {">=", GEQ, RELOP},
  {"!~", NOMATCH, MATCHOP},
  {"++", INCREMENT, INCDEC},
  {"--", DECREMENT, INCDEC},
  {NULL, ERROR}
};

/* Read one token, getting characters through lexptr.  */

static int
yylex ()
{
  register int c;
  register int namelen;
  register char *tokstart;
  register struct token *toktab;

 retry:

  tokstart = lexptr;
  c = *tokstart;
  /* See if it is a special token of length 2.  */
  for (toktab = tokentab2; toktab->operator != NULL; toktab++)
    if (c == *toktab->operator && tokstart[1] == toktab->operator[1]) {
      lexptr += 2;
      yylval.lval = toktab->value;
      return toktab->class;
    }

  switch (c) {
  case 0:
    return 0;

  case ' ':
  case '\t':
    lexptr++;
    goto retry;
    
  case '\'':
    lexptr++;
    c = *lexptr++;
    if (c == '\\')
      c = parse_escape (&lexptr);
    yylval.lval = c;
    c = *lexptr++;
    if (c != '\'') {
      yyerror ("Invalid character constant");
      return ERROR;
    }
    
    return CHAR;

  case '#':			/* it's a comment */
    while (*lexptr != '\n' && *lexptr != '\0')
      lexptr++;
    goto retry;

  case '*':
  case '%':
  case '(':
  case ')':
  case '+':
  case '-':
  case '[':
  case ']':
  case '=':
  case '!':
    yylval.cval = c;
    lexptr++;
    return c;

  case '<':
  case '>':
    yylval.cval = c;
    lexptr++;
    return RELOP;

  case '~':
    yylval.cval = c;
    lexptr++;
    return MATCHOP;
    
  case '"':
    lexptr++;
    while (*lexptr != '\0') {
      switch (*lexptr++) {
      case '\\':
	lexptr++;
	break;
      case '\n':
	yyerror ("unterminated string");
	return ERROR;
      case '\"':
	yylval.sval = tokstart;
	return STRING;
      }
    return ERROR;
  }

  if (c >= '0' && c <= '9') {
    /* It's a number */
    int seen_e = 0, seen_point = 0;
    for (namelen = 0; (c = tokstart[namelen]) != '\0'; namelen++) {
      switch (c) {
      case '.':
	if (seen_point)
	  goto got_number;
	++seen_point;
	break;
      case 'e':
      case 'E':
	if (seen_e)
	  goto got_number;
	++seen_e;
	if (tokstart[namelen+1] == '-' || tokstart[namelen+1] == '+')
	  namelen++;
	break;
      case '0': case '1': case '2': case '3': case '4': 
      case '5': case '6': case '7': case '8': case '9': 
	break;
      default:
	goto got_number;
      }
    }

got_number:
    lexptr = tokstart + namelen + 1;
    yylval.fval = atof(tokstart);
    return NUMBER;
  }
  
  if (!isalpha[c]) {
    yyerror ("Invalid token in expression\n");
    return ERROR;
  }
  
  /* It's a name.  See how long it is.  */
  
  for (namelen = 0; isalnum(tokstart[namelen]); namelen++)
    ;
  
  lexptr += namelen;
  return NAME;
}


/* Parse a C escape sequence.  STRING_PTR points to a variable
   containing a pointer to the string to parse.  That pointer
   is updated past the characters we use.  The value of the
   escape sequence is returned.

   A negative value means the sequence \ newline was seen,
   which is supposed to be equivalent to nothing at all.

   If \ is followed by a null character, we return a negative
   value and leave the string pointer pointing at the null character.

   If \ is followed by 000, we return 0 and leave the string pointer
   after the zeros.  A value of 0 does not mean end of string.  */

static int
parse_escape (string_ptr)
     char **string_ptr;
{
  register int c = *(*string_ptr)++;
  switch (c)
    {
    case 'a':
      return '\a';
    case 'b':
      return '\b';
    case 'e':
      return 033;
    case 'f':
      return '\f';
    case 'n':
      return '\n';
    case 'r':
      return '\r';
    case 't':
      return '\t';
    case 'v':
      return '\v';
    case '\n':
      return -2;
    case 0:
      (*string_ptr)--;
      return 0;
    case '^':
      c = *(*string_ptr)++;
      if (c == '\\')
	c = parse_escape (string_ptr);
      if (c == '?')
	return 0177;
      return (c & 0200) | (c & 037);
      
    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
      {
	register int i = c - '0';
	register int count = 0;
	while (++count < 3)
	  {
	    if ((c = *(*string_ptr)++) >= '0' && c <= '7')
	      {
		i *= 8;
		i += c - '0';
	      }
	    else
	      {
		(*string_ptr)--;
		break;
	      }
	  }
	return i;
      }
    default:
      return c;
    }
}

static
yyerror (s)
     char *s;
{
  error (s);
  longjmp (parse_error_return, 1);
}

/* This page contains the entry point to this file.  */

/* Parse STRING as an expression, and complain if this fails
   to use up all of the contents of STRING.  */
int
parse_c_expression (string)
     char *string;
{
  lexptr = string;
  
  if (lexptr == 0 || *lexptr == 0) {
    error ("empty #if expression\n");
    return 0;			/* don't include the #if group */
  }

  /* if there is some sort of scanning error, just return 0 and assume
     the parsing routine has printed an error message somewhere.
     there is surely a better thing to do than this.     */
  if (setjmp(parse_error_return))
    return 0;

  if (yyparse ())
    return 0;			/* actually this is never reached
				   the way things stand. */
  if (*lexptr)
    error ("Junk after end of expression.");

  return expression_value;	/* set by yyparse() */
}

#ifdef TEST_EXP_READER
/* main program, for testing purposes. */
main()
{
  int n;
  char buf[1024];
  extern int yydebug;
/*
  yydebug = 1;
*/
  initialize_random_junk ();

  for (;;) {
    printf("enter expression: ");
    n = 0;
    while ((buf[n] = getchar()) != '\n')
      n++;
    buf[n] = '\0';
    printf("parser returned %d\n", parse_c_expression(buf));
  }
}

/* table to tell if char can be part of a C identifier. */
char is_identchar[256];
/* table to tell if char can be first char of a c identifier. */
char is_identstart[256];
/* table to tell if c is horizontal space.  isspace() thinks that
   newline is space; this is not a good idea for this program. */
char is_hor_space[256];

/*
 * initialize random junk in the hash table and maybe other places
 */
initialize_random_junk()
{
  register int i;

  /*
   * Set up is_identchar and is_identstart tables.  These should be
   * faster than saying (is_alpha(c) || c == '_'), etc.
   * Must do set up these things before calling any routines tthat
   * refer to them.
   */
  for (i = 'a'; i <= 'z'; i++) {
    ++is_identchar[i - 'a' + 'A'];
    ++is_identchar[i];
    ++is_identstart[i - 'a' + 'A'];
    ++is_identstart[i];
  }
  for (i = '0'; i <= '9'; i++)
    ++is_identchar[i];
  ++is_identchar['_'];
  ++is_identstart['_'];

  /* horizontal space table */
  ++is_hor_space[' '];
  ++is_hor_space['\t'];
}

error (msg)
{
  printf("error: %s\n", msg);
}
#endif
